R Markdown

# Load necessary libraries 
set.seed(123)
library(ggplot2)
library(splines)
library(minpack.lm)
#install.packages("pracma") 
library(pracma)
#install.packages("nls2")
library(nls2)
## Loading required package: proto

Including Plots

Data Cleaning and Plotting

# Load and process the  
library(readxl)
Data_McManus2_WT_P2L_AAVCremCherry <- read_excel("Data_McManus2_WT_P2L_AAVCremCherry.xlsx")
## New names:
## • `` -> `...1`
df_MC <- as.data.frame(Data_McManus2_WT_P2L_AAVCremCherry)

# Rename columns for clarity
colnames(df_MC) <- c("Time", "X1", "X2", "X3", "X4")

# Remove outliers where Time is between 100 and 100.1
df_MC <- df_MC[!(df_MC$Time >= 100 & df_MC$Time <= 100.1), ]

# Plot each species
ggplot(df_MC, aes(x = Time)) +
  geom_line(aes(y = X1, color = "X1")) +
  geom_line(aes(y = X2, color = "X2")) +
  geom_line(aes(y = X3, color = "X3")) +
  geom_line(aes(y = X4, color = "X4")) +
  labs(title = "Data without Outliers", x = "Time", y = "X(t_i)") +
  scale_color_manual(values = c("X1" = "blue", "X2" = "green", "X3" = "red", "X4" = "purple")) +
  theme_minimal()

spline_X1 <- predict(smooth.spline(df_MC$Time, df_MC$X1),    spar =)
spline_X2 <- predict(smooth.spline(df_MC$Time, df_MC$X2))
spline_X3 <- predict(smooth.spline(df_MC$Time, df_MC$X3))
spline_X4 <- predict(smooth.spline(df_MC$Time,df_MC$X4))

# Plot the spline-smoothed data
plot(df_MC$Time, spline_X1$y, type = "l", col = "red", lwd = 2, 
     xlab = "Time", ylab = "Values", main = " Splines", 
     ylim = range(c(spline_X1$y, spline_X2$y, spline_X3$y, spline_X4)))
lines(df_MC$Time, spline_X2$y, col = "blue", lwd = 2)
lines(df_MC$Time, spline_X3$y, col = "green", lwd = 2)
lines(df_MC$Time, spline_X4$y, col = "purple", lwd = 2)

legend("topright", legend = c("spline_X1", "spline_X2", "spline_X3", "spline_X4"), 
       col = c("red", "blue", "green", "purple"), lwd = 2)

# Plot the spline-smoothed data
spline_X1 <- predict(smooth.spline(df_MC$Time, df_MC$X1, spar=1)) #black line in 2nd #plot(average for X1)

plot(df_MC$Time, df_MC$X1, type = "l", col = "yellow", lwd = 2, 
     xlab = "Time", ylab = "Values", main = " Splines", 
     ylim = range(c(spline_X1$y, spline_X2$y, spline_X3$y, spline_X4)))
lines(df_MC$Time, spline_X1$y)

plot(df_MC$Time, df_MC$X1-spline_X1$y, type = "l", col = "yellow", lwd = 2, 
     xlab = "Time", ylab = "Values", main = " Detrended Splines")

x <- 1:length(df_MC$Time)  
y <- df_MC$X1-spline_X1$y    

maxima <- findpeaks(df_MC$X1-spline_X1$y, minpeakheight=0)  
minima <- findpeaks(-df_MC$X1-spline_X1$y, minpeakheight=0) 

x_max <- maxima[,2]  
y_max <- maxima[,1]  


fit <- nls(y_max ~ a * exp(-b * x_max) + c, start = list(a = max(y_max), b = 0.01, c = min(y_max)))

summary(fit)
## 
## Formula: y_max ~ a * exp(-b * x_max) + c
## 
## Parameters:
##    Estimate Std. Error t value Pr(>|t|)    
## a 1.115e+03  9.552e+00  116.69   <2e-16 ***
## b 5.000e-04  1.421e-05   35.18   <2e-16 ***
## c 2.142e+02  1.051e+01   20.37   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 21.65 on 98 degrees of freedom
## 
## Number of iterations to convergence: 13 
## Achieved convergence tolerance: 5.184e-06
a <- coef(fit)["a"]
b <- coef(fit)["b"]
c <- coef(fit)["c"]

plot(x, y, type="l", col="yellow", main="Detrended Splines with Exponential Fit")
lines(x_max, predict(fit, list(x_max = x_max)), col="red", lwd=2)  #

Simulate Data Based on the Fitted Model

t <- seq(0, 100, by = 0.1)

model_function <- function(t, A, gamma, omega, phi, y_shift) {
  A * exp(-gamma * t ) * cos(omega * t + phi) + y_shift
}

# using estimated parameters to def simulating function
simulate_data <- function(t, params, sigma) {
  predicted <- model_function(t, params[1], params[2], params[3], params[4], params[5])
  noise <- rnorm(length(t), mean = 0, sd = sigma)  #using sigma0 to produce noise 
  simulated_data <- predicted + noise
  return(simulated_data)
}
sigma0 = 10
param0=c(1000, 0.025, 0.9, 0,0)
simulated_data <- simulate_data(t, param0, sigma0 )
plot(t, simulated_data, type = "l", col = "blue", main = "Simulated Data", xlab = "Time", ylab = "Simulated Values")

Task1

generate simulating data 1 time

true_params <- c(A = 1000, gamma = 0.025, omega = 0.9, phi = 0, y_shift = 0)

sigma0 <- 10
t <- seq(0, 100, by = 0.1)
set.seed(123)
y_sim <- model_function(t, true_params[1], true_params[2], true_params[3], true_params[4], true_params[5]) + 
         rnorm(length(t), mean = 0, sd = sigma0)

# default algorithm in nls: Gauss-Newton
fit_nls <- nls(
  y_sim ~ model_function(t, A, gamma, omega, phi, y_shift),
  start = list(A = 950, gamma = 0.03, omega = 0.85, phi = 0.1, y_shift = 0), #initial value close to true ones
  data = data.frame(t = t, y_sim = y_sim)
)
# 
summary(fit_nls)
## 
## Formula: y_sim ~ model_function(t, A, gamma, omega, phi, y_shift)
## 
## Parameters:
##           Estimate Std. Error   t value Pr(>|t|)    
## A        1.001e+03  1.447e+00   691.477   <2e-16 ***
## gamma    2.503e-02  5.468e-05   457.732   <2e-16 ***
## omega    9.000e-01  5.483e-05 16413.378   <2e-16 ***
## phi     -2.465e-04  1.459e-03    -0.169    0.866    
## y_shift  1.495e-01  3.144e-01     0.475    0.635    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.935 on 996 degrees of freedom
## 
## Number of iterations to convergence: 9 
## Achieved convergence tolerance: 7.293e-06
coef(fit_nls)
##             A         gamma         omega           phi       y_shift 
##  1.000598e+03  2.503082e-02  9.000001e-01 -2.464913e-04  1.494642e-01

##Task 2 find peaks

t <- seq(0, 100, by = 0.1)
# find peaks
peaks <- findpeaks(y_sim, minpeakheight = 100, minpeakdistance = 15) 

# peaks[, 2] :index of peaks in (1~1000)
#t[ peaks[, 2]]: index of peaks in(1~100)
t_peaks <- t[peaks[, 2]] 

y_peaks <- peaks[, 1]

#
plot(t, y_sim, type = "l", col = "blue", main = "Simulated Data with Peaks", xlab = "Time", ylab = "Simulated Values")
points(t_peaks, y_peaks, col = "red", pch = 19)

## Finding initial parameters:A and gamma for simulated data

# transform into log
log_y_peaks <- log(y_peaks)

# logy = -gamma*t + logA
fit_linear <- lm(log_y_peaks ~ t_peaks) 

summary(fit_linear)
## 
## Call:
## lm(formula = log_y_peaks ~ t_peaks)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.096938 -0.017879 -0.002984  0.026874  0.049729 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  6.876027   0.019252  357.16   <2e-16 ***
## t_peaks     -0.022952   0.000335  -68.52   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03923 on 13 degrees of freedom
## Multiple R-squared:  0.9972, Adjusted R-squared:  0.997 
## F-statistic:  4694 on 1 and 13 DF,  p-value: < 2.2e-16
print("coef:")
## [1] "coef:"
coef(fit_linear)
## (Intercept)     t_peaks 
##  6.87602738 -0.02295209
coef(fit_linear)[1]
## (Intercept) 
##    6.876027
print("coef:")
## [1] "coef:"
# get log(A)  and  gamma
log_A <- coef(fit_linear)[1]

init_gamma <- -coef(fit_linear)[2] #Extract gamma from the linear fit

cat("log(A) =", log_A, "\n")
## log(A) = 6.876027
cat("gamma =", init_gamma, "\n")
## gamma = 0.02295209
init_A=exp(log_A)
print(init_A)
## (Intercept) 
##    968.7701

to see whether the initial params A and gamma is fitted or not

#Plot the simulated data
plot(t, y_sim, type = "l", col = "blue", main = "Simulated Data with Fitted Exponential", xlab = "Time", ylab = "Simulated Values")

# Calculate the fitted exponential curve using estimated A and gamma from the linear model
init_A <- exp(log_A)  # Extract log_A from the linear model

# Add the fitted exponential curve
curve(init_A * exp(-init_gamma * x), from = 0, to = max(t), col = "green", add = TRUE, lwd = 2)
#  Add a legend 
legend("topright", legend = c("Simulated Data", "Fitted Exponential"), col = c("blue", "green"), lty = 1)

find initial value:omega

# Extract the time points of the first two consecutive maxima from t_peaks
t_max1 <- t_peaks[1]  # First maximum
t_max2 <- t_peaks[2]  # Second maximum
t_max3 <- t_peaks[3] 
t_max4 <- t_peaks[4] 
# Calculate the time difference between two consecutive maxima
Delta <- t_max2 - t_max1
Delta 
## [1] 6.7
print(t_max3-t_max2)
## [1] 6.9
print(t_max4 - t_max3)
## [1] 7
# Estimate omega (tau) using the formula 2*pi/Delta
init_omega <- 2 * pi / Delta

# Output the result
cat("initial omega (tau) =", init_omega, "\n")
## initial omega (tau) = 0.9377889
cat("Time difference between consecutive maxima (Delta) =", Delta, "\n")
## Time difference between consecutive maxima (Delta) = 6.7

we assume period delta keeps same. now our initial parameters are[A = 968.7701,gamma = 0.02295209 ,omega = 0.9377889 , phi = 0, y_shift = 0]

library(nlme)
  
fit_nls <- nls(
  y_sim ~ model_function(t, A, gamma, omega, phi, y_shift),
  start = list(A = init_A, gamma = init_gamma, omega = init_omega, phi = 0.1, y_shift = 0),
  data = data.frame(t = t, y_sim = y_sim)
)
fit_nls
## Nonlinear regression model
##   model: y_sim ~ model_function(t, A, gamma, omega, phi, y_shift)
##    data: data.frame(t = t, y_sim = y_sim)
##          A      gamma      omega        phi    y_shift 
##  1.001e+03  2.503e-02  9.000e-01 -2.467e-04  1.495e-01 
##  residual sum-of-squares: 98312
## 
## Number of iterations to convergence: 8 
## Achieved convergence tolerance: 5.117e-06

method used inside vcov()

The code below manually calculates the covariance matrix of a linear regression model and verifies its consistency with the automatically computed result from the lm function.

Specifically, it first generates data and constructs the design matrix \(X\). Then, it manually computes the regression coefficients using the formula \[ \hat{\beta} = (X^T X)^{-1} X^T y. \]

Next, the covariance matrix is calculated with the formula \[ \text{Cov}(\hat{\beta}) = (X^T X)^{-1} \cdot \frac{\sum \text{resid}^2}{\text{df.res}}, \] where \(\text{resid} = y - X\hat{\beta}\) represents the residuals and \(\text{df.res}\) is the degrees of freedom, used to adjust the error variance.

Finally, all.equal is used to compare the manually computed covariance matrix with the result from vcov(mod), confirming their consistency, with the result TRUE indicating agreement.

library(data.table)
df     = data.table(y = runif(100, 0, 100),  #generates 100 random deviates. for 0<x,y,z<100
                    x = runif(100, 0, 100),
                    z = runif(100, 0, 100))
mod    = lm(y ~ ., df)
X      = cbind(1, as.matrix(df[, -1])) #only include x and z
invXtX = solve(crossprod(X)) # compute for inverse of crossproduct(X^t*X)
coef   = invXtX %*% t(X) %*% df$y # compute coef manually: coef = (X^t*X)^(-1) *X^t*y
resid  = df$y - X %*% coef # residual = real value - predict value

df.res = nrow(X) - ncol(X) #degree of freedom
manual = invXtX * sum(resid**2)/(df.res) # 
funct  = vcov(mod)

print(mod)
## 
## Call:
## lm(formula = y ~ ., data = df)
## 
## Coefficients:
## (Intercept)            x            z  
##    58.23178     -0.10573     -0.06033
all.equal(manual, funct, check.attributes = F)
## [1] TRUE

Function of simulate_and_estimate: used for repeat simulating

#install.packages("nlstools")
library(nlstools)
## 
## 'nlstools' has been loaded.
## IMPORTANT NOTICE: Most nonlinear regression models and data set examples
## related to predictive microbiolgy have been moved to the package 'nlsMicrobio'
library(stats)
simulate_and_estimate <- function(true_params, sigma0 = 10) {
  #simulate data
  t <- seq(0, 100, by = 0.1)
  model_function <- function(t, A, gamma, omega, phi, y_shift) {
    A * exp(-gamma * t) * cos(omega * t + phi) + y_shift
  }
  y_sim <- model_function(t, true_params[1], true_params[2], true_params[3], true_params[4], true_params[5]) +
    rnorm(length(t), mean = 0, sd = sigma0)

  # find initial value
  peaks <- findpeaks(y_sim, minpeakheight = 100, minpeakdistance = 15)
  t_peaks <- t[peaks[, 2]]
  y_peaks <- peaks[, 1]

  log_y_peaks <- log(y_peaks)
  fit_linear <- lm(log_y_peaks ~ t_peaks)
  log_A <- coef(fit_linear)[1]
  init_gamma <- -coef(fit_linear)[2]
  init_A <- exp(log_A)


  Delta <- t_peaks[2] - t_peaks[1]
  init_omega <- 2 * pi / Delta

    # find estimated parameters
  fit_nls <- try(nls(
    y_sim ~ model_function(t, A, gamma, omega, phi, y_shift),
    start = list(A = init_A, gamma = init_gamma, omega = init_omega, phi = 0.1, y_shift = 0),
    data = data.frame(t = t, y_sim = y_sim)
  ), silent = TRUE)

  if (class(fit_nls) == "try-error") {
    return(NULL)
  }
  # find estmate parameters and standard errors
  param_estimates <- coef(fit_nls)
  
  
  # Calculate standard errors and confidence intervals
  conf_intervals <- confint2(fit_nls, level = 0.99, method = 'asymptotic')
  return(list(estimates = param_estimates, conf_intervals = conf_intervals))
}
# test function
true_params <- c(A = 1000, gamma = 0.025, omega = 0.9, phi = 0, y_shift = 0)
result <- simulate_and_estimate(true_params)
result
## $estimates
##             A         gamma         omega           phi       y_shift 
##  9.975672e+02  2.498159e-02  9.000614e-01 -3.702137e-04  1.260699e-01 
## 
## $conf_intervals
##                 0.5 %       99.5 %
## A       993.855376026 1.001279e+03
## gamma     0.024841109 2.512208e-02
## omega     0.899920475 9.002023e-01
## phi      -0.004124713 3.384285e-03
## y_shift  -0.681001289 9.331410e-01

Task 3: repeat simulations

# simulate 100
n_simulations <- 100
results <- vector("list", n_simulations)
# 
for (i in 1:n_simulations) {
  results[[i]] <- simulate_and_estimate(true_params)
}
results <- Filter(Negate(is.null), results)
null_count <- sum(sapply(results, is.null))
cat("Number of unsuccessful fits (NULL results):", null_count, "\n")
## Number of unsuccessful fits (NULL results): 0
# Initialize coverage counters, one counter for each parameter
coverage_counts <- rep(0, length(true_params))

# Iterate through each simulation result to check coverage.
for (i in 1:length(results)) {
  ci <- results[[i]]$conf_intervals
  for (j in 1:length(true_params)) {
    # Check if it falls within the confidence interval.
    if (true_params[j] >= ci[j, 1] && true_params[j] <= ci[j, 2]) {
      coverage_counts[j] <- coverage_counts[j] + 1
    }
  }
}
#calculate coverage
coverage_proportions <- coverage_counts / n_simulations
# print unsuccessful fits


cat("Coverage for A:", coverage_proportions[1], "\n")
## Coverage for A: 0.88
cat("Coverage for gamma:", coverage_proportions[2], "\n")
## Coverage for gamma: 0.87
cat("Coverage for omega:", coverage_proportions[3], "\n")
## Coverage for omega: 0.87
cat("Coverage for phi:", coverage_proportions[4], "\n")
## Coverage for phi: 0.87
cat("Coverage for y_shift:", coverage_proportions[5], "\n")
## Coverage for y_shift: 0.85

boxplot of estimated parames for 100 simulations

library(tidyverse) 
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ lubridate 1.9.3     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::between()     masks data.table::between()
## ✖ dplyr::collapse()    masks nlme::collapse()
## ✖ purrr::cross()       masks pracma::cross()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ dplyr::first()       masks data.table::first()
## ✖ lubridate::hour()    masks data.table::hour()
## ✖ lubridate::isoweek() masks data.table::isoweek()
## ✖ dplyr::lag()         masks stats::lag()
## ✖ dplyr::last()        masks data.table::last()
## ✖ lubridate::mday()    masks data.table::mday()
## ✖ lubridate::minute()  masks data.table::minute()
## ✖ lubridate::month()   masks data.table::month()
## ✖ lubridate::quarter() masks data.table::quarter()
## ✖ lubridate::second()  masks data.table::second()
## ✖ purrr::transpose()   masks data.table::transpose()
## ✖ lubridate::wday()    masks data.table::wday()
## ✖ lubridate::week()    masks data.table::week()
## ✖ lubridate::yday()    masks data.table::yday()
## ✖ lubridate::year()    masks data.table::year()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(tidyr)

# Extract valid results (remove NULL values);
valid_results <- Filter(Negate(is.null), results)

# Extract parameter estimates into a data frame
estimate_data <- do.call(rbind, lapply(valid_results, function(res) {
  as.data.frame(t(res$estimates))
}))


colnames(estimate_data) <- c("A", "gamma", "omega", "phi", "y_shift")

estimate_long <- estimate_data %>%
  pivot_longer(cols = everything(), names_to = "Parameter", values_to = "Estimate")

#install.packages("gridExtra")
library(gridExtra) # arange multiple plots
## 
## Attaching package: 'gridExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     combine
# Create separate boxplots for each parameter with suitable y-axis ranges;
plot_A <- ggplot(estimate_long %>% filter(Parameter == "A"), 
                 aes(x = Parameter, y = Estimate)) +
  geom_boxplot(fill = "skyblue", color = "darkblue") +
  geom_hline(yintercept = true_params["A"], linetype = "dashed") +
  labs(title = "A", y = "Estimate") +
  theme_minimal() +
  ylim(995, 1005)

plot_gamma <- ggplot(estimate_long %>% filter(Parameter == "gamma"), 
                     aes(x = Parameter, y = Estimate)) +
  geom_boxplot(fill = "lightgreen", color = "darkgreen") +
  geom_hline(yintercept = true_params["gamma"], linetype = "dashed") +
  labs(title = "gamma", y = "Estimate") +
  theme_minimal() +
  ylim(0.0248, 0.02515)

plot_omega <- ggplot(estimate_long %>% filter(Parameter == "omega"), 
                     aes(x = Parameter, y = Estimate)) +
  geom_boxplot(fill = "lightblue", color = "blue") +
  geom_hline(yintercept = true_params["omega"], linetype = "dashed") +
  labs(title = "omega", y = "Estimate") +
  theme_minimal() +
  ylim(0.89975, 0.90025)

plot_phi <- ggplot(estimate_long %>% filter(Parameter == "phi"), 
                   aes(x = Parameter, y = Estimate)) +
  geom_boxplot(fill = "pink", color = "red") +
  geom_hline(yintercept = true_params["phi"], linetype = "dashed") +
  labs(title = "phi", y = "Estimate") +
  theme_minimal() +
  ylim(min(estimate_data$phi) * 0.9, max(estimate_data$phi) * 1.1)

plot_y_shift <- ggplot(estimate_long %>% filter(Parameter == "y_shift"), 
                       aes(x = Parameter, y = Estimate)) +
  geom_boxplot(fill = "yellow", color = "orange") +
  geom_hline(yintercept = true_params["y_shift"], linetype = "dashed") +
  labs(title = "y_shift", y = "Estimate") +
  theme_minimal() +
  ylim(min(estimate_data$y_shift) * 0.9, max(estimate_data$y_shift) * 1.1)

# Arrange all subplots into a single large plot.
grid.arrange(plot_A, plot_gamma, plot_omega, plot_phi, plot_y_shift, ncol = 3)
## Warning: Removed 1 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 2 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 1 rows containing non-finite values (`stat_boxplot()`).

This plot indicates that most of the estimated parameters obtained from 100 simulations are close to the true parameters.True parameters are indicated as blacked dashed line in above plot.

Task 4

finding initial parameters on asymmetric osillator model(on initial meeting file)

time <- df_MC$Time
X1 <- df_MC$X1
# FInd X1 valleys and peaks
peaks <- findpeaks(X1, minpeakheight = 600, minpeakdistance = 20)
peaks<-peaks[-6, ]
t_peaks <- time[peaks[, 2]]
y_peaks<- peaks[, 1]
#delete outlier

valleys <- findpeaks(-X1, minpeakheight = -500, minpeakdistance = 50)

t_valleys <- time[valleys[, 2]]
y_valleys <- -valleys[, 1]  # restore 

# 
df_peaks <- data.frame(Time = t_peaks, Value = y_peaks)
df_valleys <- data.frame(Time = t_valleys, Value = y_valleys)


ggplot(df_MC, aes(x = Time, y = X1)) +
  geom_line(color = "blue") +
  geom_point(data = df_peaks, aes(x = Time, y = Value), color = "red", size = 2, shape = 17) +  
  geom_point(data = df_valleys, aes(x = Time, y = Value), color = "green", size = 2, shape = 18) + 
  labs(title = "X1 Data with Peaks and Valleys",
       x = "Time",
       y = "X1(t)") +
  theme_minimal()

log_y_peaks <- log(y_peaks)
log_y_valleys <- log(y_valleys)

fit_peaks <- lm(log_y_peaks ~ t_peaks)
fit_valleys <- lm(log_y_valleys ~ t_valleys)

eta_max <- coef(fit_peaks)[2]  # η_max
eta_min <- coef(fit_valleys)[2]  # η_min

# estimate A_max and A_min
A_max <- exp(coef(fit_peaks)[1])
A_min <- exp(coef(fit_valleys)[1])


Delta <- t_peaks[2] - t_peaks[1]
tau <- 2 * pi / Delta

#assume phi = 0
phi <- 0

# print
cat("Initial A_max:", A_max, "\n")
## Initial A_max: 2428.445
cat("Initial A_min:", A_min, "\n")
## Initial A_min: 372.6489
cat("Initial eta_max:", eta_max, "\n")-
cat("Initial eta_min:", eta_min, "\n")
## Initial eta_max: -0.002618547 
## Initial eta_min: -0.001868607
## integer(0)
cat("Initial tau:", tau, "\n")
## Initial tau: 0.2564565
cat("Initial phi:", phi, "\n")
## Initial phi: 0

plot fitted curve to compare

max_exp_curve <- A_max * exp(eta_max * time)
min_exp_curve <- A_min * exp(eta_min * time)
# 
df_peaks <- data.frame(Time = t_peaks, Value = y_peaks)
df_valleys <- data.frame(Time = t_valleys, Value = y_valleys)
df_max_exp_curve <- data.frame(Time = time, ExponentialCurve = max_exp_curve)
df_min_exp_curve <- data.frame(Time = time, ExponentialCurve = min_exp_curve)
# 
ggplot(df_MC, aes(x = Time, y = X1)) +
  geom_line(color = "blue", size = 1) +  # 
  geom_line(data = df_max_exp_curve, aes(x = Time, y = ExponentialCurve), color = "purple",  linewidth = 1, linetype = "dashed") +  # expontential curve
  geom_line(data = df_min_exp_curve, aes(x = Time, y = ExponentialCurve), color = "yellow",  linewidth = 1, linetype = "dashed") +  # expontential curve
  geom_point(data = df_peaks, aes(x = Time, y = Value), color = "red", linewidth = 2, shape = 17) +  #
  geom_point(data = df_valleys, aes(x = Time, y = Value), color = "green", linewidth = 2, shape = 18) +
  labs(title = "X1 Data with Exponential Curve Fitting",
       x = "Time",
       y = "X1(t)") +
  theme_minimal() +
  scale_y_continuous(limits = c(0, max(X1) * 1.1))  #
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning in geom_point(data = df_peaks, aes(x = Time, y = Value), color = "red",
## : Ignoring unknown parameters: `linewidth`
## Warning in geom_point(data = df_valleys, aes(x = Time, y = Value), color =
## "green", : Ignoring unknown parameters: `linewidth`

using lm(),I cannot get a good fit, so I try to use nls to find initial A and gamma:

nls_fit <- nls(y_peaks ~ A_max * exp(eta_max * t_peaks), 
               start = list(A_max = max(y_peaks), eta_max = -0.01), 
               control = nls.control(maxiter = 100))


A_max <- coef(nls_fit)["A_max"]
eta_max <- coef(nls_fit)["eta_max"]

# generate curve
exp_curve <- A_max * exp(eta_max * time)


df_exp_curve <- data.frame(Time = time, ExponentialCurve = exp_curve)
ggplot(df_MC, aes(x = Time, y = X1)) +
  geom_line(color = "blue", size = 1) +
  geom_line(data = df_exp_curve, aes(x = Time, y = ExponentialCurve), color = "purple", size = 1, linetype = "dashed") +
  geom_point(data = df_peaks, aes(x = Time, y = Value), color = "red", size = 2, shape = 17) +
  labs(title = "X1 Data with Improved Exponential Curve Fitting",
       x = "Time",
       y = "X1(t)") +
  theme_minimal() +
  scale_y_continuous(limits = c(0, max(X1) * 1.1))

still not good fit,

try to fit in 2 segments time before 100 and time after 100

#  split data for early(t<100) and late segments(t>=100)
time_early <- time[time < 100]
time_late <- time[time >= 100]

y_peaks_early <- y_peaks[t_peaks < 100]
t_peaks_early <- t_peaks[t_peaks < 100]
y_valleys_early <- y_valleys[t_valleys < 100]
t_valleys_early <- t_valleys[t_valleys < 100]

y_peaks_late <- y_peaks[t_peaks >= 100]
t_peaks_late <- t_peaks[t_peaks >= 100]
y_valleys_late <- y_valleys[t_valleys >= 100]
t_valleys_late <- t_valleys[t_valleys >= 100]

# exponential fit when t <100
log_y_peaks_early <- log(y_peaks_early)
log_y_valleys_early <- log(y_valleys_early)

fit_peaks_early <- lm(log_y_peaks_early ~ t_peaks_early)
fit_valleys_early <- lm(log_y_valleys_early ~ t_valleys_early)

A_max_early <- exp(coef(fit_peaks_early)[1])
eta_max_early <- -coef(fit_peaks_early)[2]
A_min_early <- exp(coef(fit_valleys_early)[1])
eta_min_early <- -coef(fit_valleys_early)[2]

# exponential fit when t > 100
log_y_peaks_late <- log(y_peaks_late)
log_y_valleys_late <- log(y_valleys_late)

fit_peaks_late <- lm(log_y_peaks_late ~ t_peaks_late)
fit_valleys_late <- lm(log_y_valleys_late ~ t_valleys_late)

A_max_late <- exp(coef(fit_peaks_late)[1])
eta_max_late <- -coef(fit_peaks_late)[2]
A_min_late <- exp(coef(fit_valleys_late)[1])
eta_min_late <- -coef(fit_valleys_late)[2]

#plot curve to test

# Generate exponential curves for early and late segments
max_exp_curve_early <- A_max_early * exp(-eta_max_early * time_early)
min_exp_curve_early <- A_min_early * exp(-eta_min_early * time_early)

max_exp_curve_late <- A_max_late * exp(-eta_max_late * time_late)
min_exp_curve_late <- A_min_late * exp(-eta_min_late * time_late)

# Convert data to data frames for ggplot
df_exp_curve_early <- data.frame(Time = time_early, MaxExpCurve = max_exp_curve_early, MinExpCurve = min_exp_curve_early)
df_exp_curve_late <- data.frame(Time = time_late, MaxExpCurve = max_exp_curve_late, MinExpCurve = min_exp_curve_late)

# Peaks and valleys of the original data
df_peaks <- data.frame(Time = t_peaks, Value = y_peaks)
df_valleys <- data.frame(Time = t_valleys, Value = y_valleys)

# Plot the original data and the fitted exponential curves
ggplot(df_MC, aes(x = Time, y = X1)) +
  geom_line(color = "blue", size = 1) +  # original data
  geom_line(data = df_exp_curve_early, aes(x = Time, y = MaxExpCurve), color = "purple", size = 1, linetype = "dashed") +  # Early segment max exponential curve
  geom_line(data = df_exp_curve_early, aes(x = Time, y = MinExpCurve), color = "yellow", size = 1, linetype = "dashed") +  # Early segment min exponential curve
  geom_line(data = df_exp_curve_late, aes(x = Time, y = MaxExpCurve), color = "purple", size = 1, linetype = "dotted") +  # late segment min exponential curve
  geom_line(data = df_exp_curve_late, aes(x = Time, y = MinExpCurve), color = "yellow", size = 1, linetype = "dotted") +  # late segment min exponential curve
  geom_point(data = df_peaks, aes(x = Time, y = Value), color = "red", size = 2, shape = 17) +  # Red points marking peaks
  geom_point(data = df_valleys, aes(x = Time, y = Value), color = "green", size = 2, shape = 18) +  # Green points marking valleys
  labs(title = "X1 Data with Early and Late Exponential Curve Fitting",
       x = "Time",
       y = "X1(t)") +
  theme_minimal() +
  scale_y_continuous(limits = c(0, max(X1) * 1.1))  # Adjust y-axis range to better display data

now calculate omega for time <100 and time>=100

# For time < 100
# Calculate Delta and omega for early segment (time < 100)
Delta_early <- t_peaks_early[2] - t_peaks_early[1]  # Assuming the first two peaks are used
omega_early <- 2 * pi / Delta_early

# For time >= 100
# Calculate Delta and omega for late segment (time >= 100)
Delta_late <- t_peaks_late[2] - t_peaks_late[1]  # Assuming the first two peaks in the late segment are used
omega_late <- 2 * pi / Delta_late

Create a chart to show parameters

#install.packages("knitr") # Uncomment this line if knitr is not installed
#install.packages("DT")    # Uncomment this line if DT is not installed
library(knitr)
library(DT)

# Create a data frame
result_table <- data.frame(
  Parameter = c("A_max", "eta_max", "A_min", "eta_min", "Delta", "omega"),
  `time < 100` = c(A_max_early, eta_max_early, A_min_early, eta_min_early, Delta_early, omega_early),
  `time >= 100` = c(A_max_late, eta_max_late, A_min_late, eta_min_late, Delta_late, omega_late)
)

# Display the table
# Method 1: Use knitr::kable to show a static table
kable(result_table, caption = "Comparison Table for Time < 100 and Time >= 100")
Comparison Table for Time < 100 and Time >= 100
Parameter time…100 time….100
A_max 2903.0532824 2159.5386566
eta_max 0.0049023 0.0022853
A_min 414.3190899 334.5334712
eta_min 0.0026202 0.0015619
Delta 24.5000000 25.6000000
omega 0.2564565 0.2454369
datatable(result_table, caption = "Comparison Table for Time < 100 and Time >= 100")

Find phi for assymmetric model

# when t = 1:
A_max_1 <- A_max_early * exp(-eta_max_early * 1)
A_min_1 <- A_min_early * exp(-eta_min_early * 1)

# the value of f(1)
f1 <- df_MC$X1[df_MC$Time == 1]

# phi
cos_omega_phi <- (f1 - 0.5 * (A_max_1 + A_min_1)) / (0.5 * (A_max_1 - A_min_1))
phi_initial <- acos(cos_omega_phi) - omega_early  

# 
print(phi_initial)
## (Intercept) 
##    2.427676
# when t = 102
A_max_102 <- A_max_late * exp(-eta_max_late * 102)
A_min_102 <- A_min_late * exp(-eta_min_late * 102)

# the value of f(1)
f102 <- df_MC$X1[df_MC$Time == 102]

# phi
cos_omega_phi102 <- (f1 - 0.5 * (A_max_102 + A_min_102)) / (0.5 * (A_max_102 - A_min_102))
phi_102 <- acos(cos_omega_phi102) - omega_late  

# 
print(phi_102)
## (Intercept) 
##    2.022232
#initial meeting model function
asym_model_function <- function(t, A_max, eta_max, A_min, eta_min, omega, phi) {
  A_max_t <- A_max * exp(-eta_max * t)
  A_min_t <- A_min * exp(-eta_min * t)
  result <- 0.5 * (A_max_t + A_min_t) + 0.5 * (A_max_t - A_min_t) * cos(omega * t + phi)
  return(result)
}
true_params_asym = c(A_max =2000 , eta_max = 0.005, A_min =200, eta_min = 0.0025,omega = 0.7, phi = 0)

simulate_asymm_func<- function(t, params, sigma) {
  predicted <- asym_model_function(t, params[1], params[2], params[3], params[4], params[5],params[6])
  noise <- rnorm(length(t), mean = 0, sd = sigma)  #using sigma0 to produce noise 
  simulated_data <- predicted + noise
  return(simulated_data)
}

sigma0 = 10
t <- seq(0, 100, by = 0.1)
simulated_asymm_data <- simulate_asymm_func(t, true_params_asym, sigma0 )
plot(t,simulated_asymm_data,type = "l", col = "blue")

peaks <- findpeaks(simulated_asymm_data, minpeakheight = 600, minpeakdistance = 20)

t_peaks <- time[peaks[, 2]]
y_peaks<- peaks[, 1]
#delete outlier

valleys <- findpeaks(-simulated_asymm_data, minpeakheight = -500, minpeakdistance = 50)

t_valleys <- time[valleys[, 2]]
y_valleys <- -valleys[, 1]  # restore 

df_simulated <- data.frame(Time = t, X1 = simulated_asymm_data)

# Create data frames for peaks and valleys
df_peaks <- data.frame(Time = t_peaks, Value = y_peaks)
df_valleys <- data.frame(Time = t_valleys, Value = y_valleys)

# Plotting using ggplot2

ggplot(df_simulated, aes(x = Time, y = X1)) +
  geom_line(color = "blue") +  # Main line for simulated data
  geom_point(data = df_peaks, aes(x = Time, y = Value), color = "red", size = 2, shape = 17) +  # Peaks in red
  geom_point(data = df_valleys, aes(x = Time, y = Value), color = "green", size = 2, shape = 18) +  # Valleys in green
  labs(
    title = "X1 Data with Peaks and Valleys",
    x = "Time",
    y = "X1(t)"
  ) +
  theme_minimal()

# exponential fit when t <100
log_y_peaks <- log(y_peaks)
log_y_valleys <- log(y_valleys)

fit_peaks <- lm(log_y_peaks ~ t_peaks)
fit_valleys <- lm(log_y_valleys ~ t_valleys)

A_max_asym_sim <- exp(coef(fit_peaks)[1])
eta_max_asym_sim <- -coef(fit_peaks)[2]
A_min_asym_sim <- exp(coef(fit_valleys)[1])
eta_min_asym_sim <- -coef(fit_valleys)[2]

Delta_asym_sim <- t_peaks[2] - t_peaks[1]  # Assuming the first two peaks are used
omega_asym_sim <- 2 * pi / Delta_asym_sim
initial_params_asymm_sim =c(A_max_asym_sim,eta_max_asym_sim,A_min_asym_sim,eta_min_asym_sim, omega_asym_sim, phi = 0)
print(initial_params_asymm_sim)
##  (Intercept)      t_peaks  (Intercept)    t_valleys                       phi 
## 2.055468e+03 6.041656e-03 1.908841e+02 2.462641e-03 7.391983e-01 0.000000e+00
cat("true_params_asym: ", true_params_asym)
## true_params_asym:  2000 0.005 200 0.0025 0.7 0
# Define initial parameter values calculated from the peaks and valleys
initial_params_asymm_sim <- list(
  A_max = A_max_asym_sim,
  eta_max = eta_max_asym_sim,
  A_min = A_min_asym_sim,
  eta_min = eta_min_asym_sim,
  omega = omega_asym_sim,
  phi = 0
)

fit_nls <- nlsLM(
  simulated_asymm_data ~ asym_model_function(t, A_max, eta_max, A_min, eta_min, omega, phi),
  start = initial_params_asymm_sim,
  data = data.frame(t = t, simulated_asymm_data = simulated_asymm_data)
)
# Display the summary of the fitted model
summary(fit_nls)
## 
## Formula: simulated_asymm_data ~ asym_model_function(t, A_max, eta_max, 
##     A_min, eta_min, omega, phi)
## 
## Parameters:
##                    Estimate Std. Error   t value Pr(>|t|)    
## A_max.(Intercept) 2.000e+03  1.246e+00  1605.533   <2e-16 ***
## eta_max.t_peaks   4.992e-03  1.215e-05   410.980   <2e-16 ***
## A_min.(Intercept) 2.000e+02  1.198e+00   166.943   <2e-16 ***
## eta_min.t_valleys 2.497e-03  1.119e-04    22.314   <2e-16 ***
## omega             7.000e-01  2.329e-05 30048.486   <2e-16 ***
## phi               1.281e-03  1.159e-03     1.105     0.27    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.22 on 995 degrees of freedom
## 
## Number of iterations to convergence: 14 
## Achieved convergence tolerance: 1.49e-08

Repeat Simulating for model in initial meeting

:I found that the model is not fitting(as initial parameters = estimated parameters)

true_params_asym = c(A_max =2000 , eta_max = 0.005, A_min =200, eta_min = 0.0025,omega = 0.7, phi = 0)

coverage_results <- list(
  A_max = 0, eta_max = 0, A_min = 0, eta_min = 0, omega = 0, phi = 0
)


num_simulations <- 2
confidence_level <- 0.99
z_score <- qnorm((1 + confidence_level) / 2)  
sigma0=1

for (i in 1:num_simulations) {
  
  simulated_data <- simulate_asymm_func(t, true_params_asym, sigma0)
  
  
  peaks <- findpeaks(simulated_data, minpeakheight = 600, minpeakdistance = 20)

  t_peaks <- t[peaks[, 2]]
  y_peaks <- peaks[, 1]
  
  valleys <- findpeaks(-simulated_data, minpeakheight = -500, minpeakdistance = 50)

  t_valleys <- t[valleys[, 2]]
  y_valleys <- -valleys[, 1]
  
  fit_peaks <- lm(log(y_peaks) ~ t_peaks)
  fit_valleys <- lm(log(y_valleys) ~ t_valleys)
  
  A_max_sim <- exp(coef(fit_peaks)[1])
  eta_max_sim <- -coef(fit_peaks)[2]
  A_min_sim <- exp(coef(fit_valleys)[1])
  eta_min_sim <- -coef(fit_valleys)[2]
  Delta_sim <- mean(diff(t_peaks[1:2]))
  omega_sim <- 2 * pi / Delta_sim
  
  initial_params <- list(
    A_max = A_max_sim, eta_max = eta_max_sim, 
    A_min = A_min_sim, eta_min = eta_min_sim, 
    omega = omega_sim, phi = 0
  )
  print(paste("Iteration:", i, "Initial Params:", initial_params))
 
  fit_nls <- nls2(
        simulated_data ~ asym_model_function(t, A_max, eta_max, A_min, eta_min, omega, phi),
        start = initial_params, 
        algorithm = "brute-force",
        data = data.frame(t = t, simulated_data = simulated_data)
      )
  
  estimated_params <- coef(fit_nls)  
  print(paste("Iteration:", i, "Estimated Params:", estimated_params))
  param_se <- summary(fit_nls)$coefficients[, "Std. Error"]  
  
  conf_intervals <- confint2(fit_nls, level = 0.99, method = 'asymptotic')
  
 
  for (param in names(coverage_results)) {
      
      true_value <- true_params_asym[[param]]
      lower_bound <- conf_intervals[param, 1]
      upper_bound <- conf_intervals[param, 2]
      
     
      if (true_value >= lower_bound && true_value <= upper_bound) {
        coverage_results[[param]] <- coverage_results[[param]] + 1
      }
    }
}
## [1] "Iteration: 1 Initial Params: c(`(Intercept)` = 2001.22842936914)"
## [2] "Iteration: 1 Initial Params: c(t_peaks = 0.00500879240684368)"   
## [3] "Iteration: 1 Initial Params: c(`(Intercept)` = 199.922583506611)"
## [4] "Iteration: 1 Initial Params: c(t_valleys = 0.0025395218015428)"  
## [5] "Iteration: 1 Initial Params: 0.705975877211189"                  
## [6] "Iteration: 1 Initial Params: 0"                                  
## [1] "Iteration: 1 Estimated Params: 2001.22842936914"   
## [2] "Iteration: 1 Estimated Params: 0.00500879240684368"
## [3] "Iteration: 1 Estimated Params: 199.922583506611"   
## [4] "Iteration: 1 Estimated Params: 0.0025395218015428" 
## [5] "Iteration: 1 Estimated Params: 0.705975877211189"  
## [6] "Iteration: 1 Estimated Params: 0"                  
## [1] "Iteration: 2 Initial Params: c(`(Intercept)` = 2000.92363964796)"
## [2] "Iteration: 2 Initial Params: c(t_peaks = 0.00500051344975148)"   
## [3] "Iteration: 2 Initial Params: c(`(Intercept)` = 199.742776469103)"
## [4] "Iteration: 2 Initial Params: c(t_valleys = 0.002501172102493)"   
## [5] "Iteration: 2 Initial Params: 0.705975877211189"                  
## [6] "Iteration: 2 Initial Params: 0"                                  
## [1] "Iteration: 2 Estimated Params: 2000.92363964796"   
## [2] "Iteration: 2 Estimated Params: 0.00500051344975148"
## [3] "Iteration: 2 Estimated Params: 199.742776469103"   
## [4] "Iteration: 2 Estimated Params: 0.002501172102493"  
## [5] "Iteration: 2 Estimated Params: 0.705975877211189"  
## [6] "Iteration: 2 Estimated Params: 0"
coverage_results <- sapply(coverage_results, function(x) x / num_simulations)
print(coverage_results)
##   A_max eta_max   A_min eta_min   omega     phi 
##       1       1       1       1       0       1
# list to store  results
coverage_results <- list(
  A_max = 0, eta_max = 0, A_min = 0, eta_min = 0, omega = 0, phi = 0
)
print(true_params_asym)
##   A_max eta_max   A_min eta_min   omega     phi 
## 2.0e+03 5.0e-03 2.0e+02 2.5e-03 7.0e-01 0.0e+00
num_simulations <- 100

sigma0 = 10

for (i in 1:num_simulations) {

  simulated_data <- simulate_asymm_func(t, true_params_asym, sigma0) 
  
  
  peaks <- findpeaks(simulated_data, minpeakheight = 600, minpeakdistance = 20)
  t_peaks <- t[peaks[, 2]]
  y_peaks <- peaks[, 1]
  
  valleys <- findpeaks(-simulated_data, minpeakheight = -500, minpeakdistance = 50)
  t_valleys <- t[valleys[, 2]]
  y_valleys <- -valleys[, 1]
  
  fit_peaks <- lm(log(y_peaks) ~ t_peaks)
  fit_valleys <- lm(log(y_valleys) ~ t_valleys)
  
  A_max_sim <- exp(coef(fit_peaks)[1])
  eta_max_sim <- -coef(fit_peaks)[2]
  A_min_sim <- exp(coef(fit_valleys)[1])
  eta_min_sim <- -coef(fit_valleys)[2]
  Delta_sim <- t_peaks[2] - t_peaks[1]
  omega_sim <- 2 * pi / Delta_sim
  
  initial_params <- list(
    A_max = A_max_sim, eta_max = eta_max_sim, 
    A_min = A_min_sim, eta_min = eta_min_sim, 
    omega = omega_sim, phi = 0
  )

  library(minpack.lm)
  fit_nls <- tryCatch(
    {
      nlsLM(
        simulated_data ~ asym_model_function(t, A_max, eta_max, A_min, eta_min, omega, phi),
        start = initial_params,
        data = data.frame(t = t, simulated_data = simulated_data)
      )
    },
    error = function(e) {
      message("Fitting failed for iteration ", i)
      return(NULL)  
    }
  )
  
 
  if (is.null(fit_nls)) next
  
  estimated_params <- coef(fit_nls)  
  # print(initial_params)
  # print(estimated_params)
  conf_intervals <- confint2(fit_nls, level = 0.99, method = 'asymptotic')
  
  rownames(conf_intervals) <- c("A_max", "eta_max", "A_min", "eta_min", "omega", "phi")
  
  # coverage
  for (param in names(coverage_results)) {
    
    true_value <- true_params_asym[[param]]
    lower_bound <- conf_intervals[param, 1]
    upper_bound <- conf_intervals[param, 2]
    
    
    if (true_value >= lower_bound && true_value <= upper_bound) {
      coverage_results[[param]] <- coverage_results[[param]] + 1
    }
  }
}

coverage_results <- sapply(coverage_results, function(x) x / num_simulations)
print(coverage_results)
##   A_max eta_max   A_min eta_min   omega     phi 
##    1.00    1.00    1.00    1.00    0.97    1.00

Model Fitting for Asymmetrical Oscillatory Model with fixed variance

In this analysis, we fit an asymmetrical oscillatory model to the McManus data(X1), using \(t = 100\) as the boundary to split the data into two parts: \(t < 100\) and \(t \geq 100\). The model is given by:

\[ y_{i,j} \sim N(f_j(t_i), \sigma_i^2), \quad i = 1, \dots, n, \text{ independently} \]

where the function \(f_j(t)\) is defined as:

\[ f_j(t) = 0.5 (A_{\text{max}}(t) + A_{\text{min}}(t)) + 0.5 (A_{\text{max}}(t) - A_{\text{min}}(t)) \cos(\tau t + \phi_j) \]

Here, \(A_{\text{max}}(t) = A_{\text{max}} \cdot \exp(-\eta_{\text{max}} \cdot t)\) and \(A_{\text{min}}(t) = A_{\text{min}} \cdot \exp(-\eta_{\text{min}} \cdot t)\) represent the decaying maximum and minimum amplitudes over time, with parameters \(A_{\text{max}}, \eta_{\text{max}}, A_{\text{min}}, \eta_{\text{min}}, \tau\) (frequency), and \(\phi_j\) (phase shift) as the model parameters to be estimated.

We use the function in R to fit this non-linear model separately for the data segments \(t < 100\) and \(t \geq 100\), using appropriate initial parameter values for each segment based on prior analysis.

Each model fit provides the estimated parameters \(A_{\text{max}}, \eta_{\text{max}}, A_{\text{min}}, \eta_{\text{min}}, \omega, \phi_j\) and their associated 99% confidence intervals.

# Split the data into two parts: time < 100 and time >= 100
df_MC_early <- subset(df_MC, Time < 100)
df_MC_late <- subset(df_MC, Time >= 100)

# Define initial parameters for the time < 100 case
initial_params_early <- list(
  A_max = A_max_early,       
  eta_max = eta_max_early,   
  A_min = A_min_early,       
  eta_min = eta_min_early,   
  omega = omega_early,       
  phi_j = phi_initial            
)

# Fit the model for time < 100
fit_nls_early <- nls(
  X1 ~ asym_model_function(Time, A_max, eta_max, A_min, eta_min, omega, phi_j),
  start = initial_params_early,
  data = df_MC_early
)
conf_intervals_early <- confint2(fit_nls_early, level = 0.99, method = 'asymptotic')
cat("Fit results for time < 100:\n")
## Fit results for time < 100:
coef(fit_nls_early)
##         A_max       eta_max         A_min       eta_min         omega 
##  2.780759e+03  5.152686e-03  1.580878e+02 -3.823149e-03  2.600176e-01 
##         phi_j 
##  3.672187e+00
# Fit the model for time < 100 using nlsLM
fit_nlsLM_early <- tryCatch({
  nlsLM(
    X1 ~ asym_model_function(Time, A_max, eta_max, A_min, eta_min, omega, phi_j),
    start = initial_params_early,
    data = df_MC_early
  )
}, error = function(e) {
  message("Fitting failed for the early time period")
  return(NULL)
})
estimated_early_params <- coef(fit_nlsLM_early)
names(estimated_early_params)<-c("A_max", "eta_max", "A_min", "eta_min", "omega", "phi")
# Check if fitting was successful before calculating confidence intervals
if (!is.null(fit_nlsLM_early)) {
  conf_intervals_early <- confint2(fit_nlsLM_early, level = 0.99, method = 'asymptotic')
  cat("Fit results for time < 100:\n")
  print(estimated_early_params)
  print(conf_intervals_early)
} else {
  cat("Fitting was unsuccessful for the early time period.\n")
}
## Fit results for time < 100:
##         A_max       eta_max         A_min       eta_min         omega 
##  2.780757e+03  5.152672e-03  1.580923e+02 -3.822675e-03  2.600176e-01 
##           phi 
##  3.672186e+00 
##                                 0.5 %        99.5 %
## A_max.(Intercept)        2.744675e+03  2.816839e+03
## eta_max.t_peaks_early    4.880479e-03  5.424865e-03
## A_min.(Intercept)        1.280567e+02  1.881280e+02
## eta_min.t_valleys_early -6.638467e-03 -1.006882e-03
## omega                    2.595499e-01  2.604852e-01
## phi_j.(Intercept)        3.649612e+00  3.694760e+00
# Define initial parameters for the time >= 100 case
initial_params_late <- list(
  A_max = A_max_late,       
  eta_max = eta_max_late,   
  A_min = A_min_late,       
  eta_min = eta_min_late,   
  omega = omega_late,       
  phi_j = phi_102              # Initial guess for phi_j
)

# Fit the model for time >= 100
fit_nls_late <- nls(
  X1 ~ asym_model_function(Time, A_max, eta_max, A_min, eta_min, omega, phi_j),
  start = initial_params_late,
  data = df_MC_late
)

cat("Fit results for time >= 100:\n")
## Fit results for time >= 100:
summary(fit_nls_late)
## 
## Formula: X1 ~ asym_model_function(Time, A_max, eta_max, A_min, eta_min, 
##     omega, phi_j)
## 
## Parameters:
##           Estimate Std. Error  t value Pr(>|t|)    
## A_max    2.143e+03  8.167e+00  262.393  < 2e-16 ***
## eta_max  2.461e-03  1.472e-05  167.216  < 2e-16 ***
## A_min    1.769e+02  5.822e+00   30.384  < 2e-16 ***
## eta_min  5.527e-04  1.069e-04    5.172 2.43e-07 ***
## omega    2.582e-01  2.971e-05 8692.729  < 2e-16 ***
## phi_j   -2.299e+00  7.514e-03 -306.002  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 65.21 on 3993 degrees of freedom
## 
## Number of iterations to convergence: 12 
## Achieved convergence tolerance: 4.672e-06
conf_intervals_late <- confint2(fit_nls_late, level = 0.99, method = 'asymptotic')
# Fit the model for time < 100 using nlsLM
fit_nlsLM_late <- tryCatch({
  nlsLM(
    X1 ~ asym_model_function(Time, A_max, eta_max, A_min, eta_min, omega, phi_j),
    start = initial_params_late,
    data = df_MC_late
  )
}, error = function(e) {
  message("Fitting failed for the late time period")
  return(NULL)
})
estimated_late_params <-coef(fit_nlsLM_late)
# Check if fitting was successful before calculating confidence intervals
if (!is.null(fit_nlsLM_late)) {
  conf_intervals_late <- confint2(fit_nlsLM_late, level = 0.99, method = 'asymptotic')
  cat("Fit results for time < 100:\n")
  print(coef(fit_nlsLM_late))
  print(conf_intervals_late)
} else {
  cat("Fitting was unsuccessful for the late time period.\n")
}
## Fit results for time < 100:
##      A_max.(Intercept)   eta_max.t_peaks_late      A_min.(Intercept) 
##           2.142972e+03           2.460661e-03           1.769057e+02 
## eta_min.t_valleys_late                  omega      phi_j.(Intercept) 
##           5.527330e-04           2.582176e-01          -2.299163e+00 
##                                0.5 %        99.5 %
## A_max.(Intercept)       2.121925e+03  2.164019e+03
## eta_max.t_peaks_late    2.422738e-03  2.498583e-03
## A_min.(Intercept)       1.619013e+02  1.919101e+02
## eta_min.t_valleys_late  2.773166e-04  8.281494e-04
## omega                   2.581411e-01  2.582942e-01
## phi_j.(Intercept)      -2.318526e+00 -2.279800e+00
estimated_late_params
##      A_max.(Intercept)   eta_max.t_peaks_late      A_min.(Intercept) 
##           2.142972e+03           2.460661e-03           1.769057e+02 
## eta_min.t_valleys_late                  omega      phi_j.(Intercept) 
##           5.527330e-04           2.582176e-01          -2.299163e+00
names(estimated_late_params) <- c("A_max", "eta_max", "A_min", "eta_min", "omega", "phi")
estimated_late_params
##         A_max       eta_max         A_min       eta_min         omega 
##  2.142972e+03  2.460661e-03  1.769057e+02  5.527330e-04  2.582176e-01 
##           phi 
## -2.299163e+00

The code below extracts the estimated parameters (\(A_{\text{max}}\), \(\eta_{\text{max}}\), \(A_{\text{min}}\), \(\eta_{\text{min}}\), \(\omega\), \(\phi_j\)) and their corresponding 99% confidence intervals for both (time \(<\) 100) and (time \(\geq\) 100) periods. It combines these values into a single data frame () that lists each parameter with its estimate, lower bound (0.5%), and upper bound (99.5%) for both time periods. The purpose is to organize the parameter estimates and confidence intervals in a structured format, enabling easy comparison between the two time periods.

estimates_early <- estimated_early_params
lower_early <- conf_intervals_early[, 1]
upper_early <- conf_intervals_early[, 2]

estimates_late <- estimated_late_params
lower_late <- conf_intervals_late[, 1]
upper_late <- conf_intervals_late[, 2]


combined_data <- data.frame(
  Parameter = rep(names(estimates_early), 2),  
  Time_Period = rep(c("time < 100", "time >= 100"), each = length(estimates_early)),  
  Estimate = c(estimates_early, estimates_late),  
  Lower_0.5 = c(lower_early, lower_late), 
  Upper_99.5 = c(upper_early, upper_late)  
)


kable(combined_data, caption = "Estimated Parameters with 99% Confidence Intervals for Different Time Periods")
Estimated Parameters with 99% Confidence Intervals for Different Time Periods
Parameter Time_Period Estimate Lower_0.5 Upper_99.5
A_max time < 100 2780.7568895 2744.6748190 2816.8389599
eta_max time < 100 0.0051527 0.0048805 0.0054249
A_min time < 100 158.0923415 128.0567310 188.1279519
eta_min time < 100 -0.0038227 -0.0066385 -0.0010069
omega time < 100 0.2600176 0.2595499 0.2604852
phi time < 100 3.6721860 3.6496118 3.6947601
A_max time >= 100 2142.9718996 2121.9249532 2164.0188460
eta_max time >= 100 0.0024607 0.0024227 0.0024986
A_min time >= 100 176.9056728 161.9012762 191.9100693
eta_min time >= 100 0.0005527 0.0002773 0.0008281
omega time >= 100 0.2582176 0.2581411 0.2582942
phi time >= 100 -2.2991626 -2.3185255 -2.2797998

I uses the estimated parameters for the \(<\) 100 and \(\geq\) 100 periods to generate predicted values using the . The original data points (in black) for both periods are overlaid with the model’s predicted curves (in blue for and red for ). The objective is to visually assess how well the model’s predictions (based on estimated parameters) match the original data for both periods.

df_MC_early$predicted <- asym_model_function(df_MC_early$Time, estimated_early_params["A_max"], estimated_early_params["eta_max"], 
                                             estimated_early_params["A_min"], estimated_early_params["eta_min"], 
                                             estimated_early_params["omega"], estimated_early_params["phi"])

df_MC_late$predicted <- asym_model_function(df_MC_late$Time, 
                    estimated_late_params["A_max"], estimated_late_params["eta_max"], estimated_late_params["A_min"], estimated_late_params["eta_min"], 
              estimated_late_params["omega"], estimated_late_params["phi"])


ggplot() +
 
  geom_point(data = df_MC_early, aes(x = Time, y = X1), color = "black", size = 0.3) +
  geom_line(data = df_MC_early, aes(x = Time, y = predicted), color = "blue", size = 0.5) +
 
  geom_point(data = df_MC_late, aes(x = Time, y = X1), color = "black", size = 0.3) +
  geom_line(data = df_MC_late, aes(x = Time, y = predicted), color = "red", size = 0.5) +
 
  labs(title = "Comparison of Original Data and Model Fit",
       x = "Time",
       y = "X1") +
  theme_minimal() +
  theme(plot.title = element_text(hjust = 0.5))

Nonlinear Least Squares Model Fitting with Weights ## time specific function

# Define the model function
model_function <- function(t, A, gamma, omega, phi, y_shift) {
  A * exp(-gamma * t ) * cos(omega * t + phi) + y_shift
}
# Function to calculate variance at each time point
calculate_variance <- function(replicates) {
  n <- nrow(replicates)
  mean_vals <- rowMeans(replicates)  # Mean for each time point
  variance <- rowSums((replicates - mean_vals)^2) / (n - 1)  # Variance formula
  return(variance)
}
# Weighted nonlinear least squares estimation
weighted_nls <- function(t, replicates, initial_params) {
  variances <- calculate_variance(replicates)  # Variance at each time point

  # Fit the model using nlsLM
  fit <- nlsLM(y ~ model_function(t, A, gamma, omega, phi, y_shift),
               start = initial_params,
               data = list(t = t, y = rowMeans(replicates)),
               weights = 1 / variances)  # Weight by the inverse of variances
  return(fit)
}
# Use data from replicates for fitting
replicates <- df_MC[, c("X1", "X2", "X3", "X4")]
# Calculate variance
observed_variances <- calculate_variance(replicates)
hist(observed_variances, main = "Histogram of Variances",
     xlab = "Variance",
     ylab = "Frequency",
     col = "lightblue",
     border = "black")

bayesian inference for variance

Suppose we have a sample \(X_1, X_2, \dots, X_n\) drawn from a normal distribution \(\mathcal{N}(\mu, \sigma^2)\), with an observed sample variance \(S_i^2\). Then, the distribution of the observed sample variance is: \[ \frac{(n-1) S_i^2}{\sigma_i^2} \sim \chi^2_{n-1} \] where \(\chi^2_{n-1}\) denotes a chi-squared distribution with \(n-1\) degrees of freedom.

Therefore, given \(\sigma_i^2\), the probability density function (likelihood function) of the sample variance \(S_i^2\) can be written as: \[ P(S_i^2 | \sigma_i^2) \propto \left( \frac{1}{\sigma_i^2} \right)^{(n-1)/2} \exp \left( -\frac{(n-1) S_i^2}{2 \sigma_i^2} \right) \]

For simplicity, we assume the inverse of the variance \(\sigma_i^{-2}\) follows a Gamma distribution: \[ \sigma_i^{-2} \sim \text{Gamma}(\alpha_{\text{prior}}, \beta_{\text{prior}}) \] The probability density function of the Gamma distribution is: \[ P(\sigma_i^{-2}) \propto (\sigma_i^{-2})^{\alpha_{\text{prior}} - 1} \exp(-\beta_{\text{prior}} \cdot \sigma_i^{-2}) \]

According to Bayes’ theorem, the posterior distribution \(P(\sigma_i^{-2} | S_i^2)\) is proportional to the product of the prior distribution and the likelihood function: \[ P(\sigma_i^{-2} | S_i^2) \propto P(S_i^2 | \sigma_i^2) \cdot P(\sigma_i^{-2}) \] Substituting the likelihood function and prior distribution, we have: \[ P(\sigma_i^{-2} | S_i^2) \propto (\sigma_i^{-2})^{(n-1)/2} \exp \left( -\frac{(n-1) S_i^2}{2} \sigma_i^{-2} \right) \cdot (\sigma_i^{-2})^{\alpha_{\text{prior}} - 1} \exp(-\beta_{\text{prior}} \cdot \sigma_i^{-2}) \] Combining the exponents, we get: \[ P(\sigma_i^{-2} | S_i^2) \propto (\sigma_i^{-2})^{\alpha_{\text{prior}} + (n-1)/2 - 1} \exp \left( -\left( \beta_{\text{prior}} + \frac{(n-1) S_i^2}{2} \right) \sigma_i^{-2} \right) \]

We observe that this expression still has the form of a Gamma distribution. Therefore, the posterior distribution \(P(\sigma_i^{-2} | S_i^2)\) is a Gamma distribution, with posterior parameters:

The mean of a Gamma distribution is given by \(\frac{\text{shape}}{\text{scale}}\), so the mean of the posterior distribution (used as an estimate of the inverse variance) is: \[ \text{Posterior Mean} = \frac{\alpha_{\text{post}}}{\beta_{\text{post}}} \]

# # Define the Bayesian inference function
bayesian_variance_estimation <- function(observed_variances, n, alpha_prior = 1, beta_prior = 1) {
  # Input parameters:
  # - observed_variances: a vector of observed variances
  # - n: the number of samples at each time point
  # - alpha_prior, beta_prior: parameters for the Gamma prior distribution
  # Create a vector to store the estimated weight
  weights <- numeric(length(observed_variances))
  
  # # Iterate over each observed variance and perform Bayesian inference
  for (i in 1:length(observed_variances)) {
    S2_i <- observed_variances[i]
    
    alpha_post <- alpha_prior + (n - 1) / 2 # Update the posterior parameters
    beta_post <- beta_prior + S2_i * (n - 1) / 2 
    posterior_mean <- alpha_post / beta_post  # Calculate the mean of the posterior distribution 
                                              #  (mean of a Gamma distribution is shape/scale)
    weights[i] <- posterior_mean  # Use the posterior mean as the reliable weight
  }
  return(weights)
}
# # Set prior parameters (can be adjusted as needed)
alpha_prior <- 2.5  # Gamma: shape
beta_prior <- 7   # Gamma: scale
# Number of samples
n <- ncol(replicates)
# Calculate reliable weights using Bayesian inference
weights <- bayesian_variance_estimation(observed_variances, n, alpha_prior, beta_prior)
# Plot a histogram of the adjusted Bayesian weights
hist(weights, main = "Histogram of Bayesian Adjusted Weights",
     xlab = "Adjusted Weight", ylab = "Frequency", col = "lightblue", border = "black")

Estimate prior parameters

MLE

Assume \(\sigma_i^{-2} \sim \text{Gamma}(\alpha, \beta)\), where \(\alpha\) is the shape parameter and \(\beta\) is the scale parameter. The probability density function of the Gamma distribution is given by: \[ f(x; \alpha, \beta) = \frac{\beta^\alpha}{\Gamma(\alpha)} x^{\alpha - 1} e^{-\beta x} \] where \(x = \sigma_i^{-2}\), and \(x > 0\).

Assume we have \(n\) observed values \(x_1, x_2, \ldots, x_n\) (where \(x_i = \sigma_i^{-2}\)). The likelihood function \(L(\alpha, \beta)\) for these observations is: \[ L(\alpha, \beta) = \prod_{i=1}^n f(x_i; \alpha, \beta) = \prod_{i=1}^n \frac{\beta^\alpha}{\Gamma(\alpha)} x_i^{\alpha - 1} e^{-\beta x_i} \]

For simplicity, we take the log of the likelihood function to obtain the log-likelihood \(\ell(\alpha, \beta)\): \[ \ell(\alpha, \beta) = \ln L(\alpha, \beta) = \sum_{i=1}^n \ln \left( \frac{\beta^\alpha}{\Gamma(\alpha)} x_i^{\alpha - 1} e^{-\beta x_i} \right) \] Expanding this expression: \[ \ell(\alpha, \beta) = \sum_{i=1}^n (\alpha \ln \beta - \ln \Gamma(\alpha) + (\alpha - 1) \ln x_i - \beta x_i) \] Simplifying further, we get: \[ \ell(\alpha, \beta) = n \alpha \ln \beta - n \ln \Gamma(\alpha) + (\alpha - 1) \sum_{i=1}^n \ln x_i - \beta \sum_{i=1}^n x_i \]

To find the maximum likelihood estimate, we need to take the derivatives with respect to \(\alpha\) and \(\beta\) and set them equal to zero.

\[ \frac{\partial \ell}{\partial \beta} = \frac{n \alpha}{\beta} - \sum_{i=1}^n x_i \] Setting this to zero: \[ \frac{n \alpha}{\beta} = \sum_{i=1}^n x_i \] Solving for \(\beta\): \[ \beta = \frac{n \alpha}{\sum_{i=1}^n x_i} \]

The derivative with respect to \(\alpha\) requires using the derivative of \(\ln \Gamma(\alpha)\), which is \(\psi(\alpha) = \frac{d}{d\alpha} \ln \Gamma(\alpha)\), where \(\psi(\alpha)\) is the Digamma function. \[ \frac{\partial \ell}{\partial \alpha} = n \ln \beta - n \psi(\alpha) + \sum_{i=1}^n \ln x_i \]

Method of Moment

Calculate Sample Moments:

Sample mean: \(\hat{\mu} = \frac{1}{n} \sum_{i=1}^{n} \hat{\sigma}_i^{-2}\)

Sample variance:\(\hat{s}^2 = \frac{1}{n-1} \sum_{i=1}^{n} \left( \hat{\sigma}_i^{-2} - \hat{\mu} \right)^2\)

According to the expectation and variance of the Gamma distribution, we have: \[ \frac{d_{\text{prior}}}{\beta_{\text{prior}}} = \hat{\mu} \] \[ \frac{d_{\text{prior}}}{\beta_{\text{prior}}^2} = \hat{s}^2 \]

By solving these two equations, we obtain: \[ \beta_{\text{prior}} = \frac{\hat{\mu}}{\hat{s}^2} \] \[ d_{\text{prior}} = \frac{\hat{\mu}^2}{\hat{s}^2} \]